import pandas as pd

def dataload():
    # 读取训练数据和测试数据
    train_data = pd.read_csv('../data/train.csv')
    test_data = pd.read_csv('../data/test.csv')
    # 删除全部相同的数据
    train_data = train_data.drop(['StandardHours'], axis=1)
    test_data = test_data.drop(['StandardHours'], axis=1)

    # 删除无用的特征
    # train_data = train_data.drop(
    #     ['Gender', 'Over18', 'Education', 'EmployeeNumber', 'NumCompaniesWorked', 'PercentSalaryHike',
    #      'PerformanceRating', 'RelationshipSatisfaction', 'TrainingTimesLastYear', 'WorkLifeBalance',
    #      'YearsSinceLastPromotion'], axis=1)
    # test_data = test_data.drop(
    #     ['Gender', 'Over18', 'Education', 'EmployeeNumber', 'NumCompaniesWorked', 'PercentSalaryHike',
    #      'PerformanceRating', 'RelationshipSatisfaction', 'TrainingTimesLastYear', 'WorkLifeBalance',
    #      'YearsSinceLastPromotion'], axis=1)

    train_data = train_data.drop(
        ['Gender', 'Over18', 'Education', 'EmployeeNumber'
        ], axis=1)
    test_data = test_data.drop(
        ['Gender', 'Over18', 'Education', 'EmployeeNumber'
         ], axis=1)

    # 划分特征和标签
    X_train = train_data.drop('Attrition', axis=1)
    X_test = test_data.drop('Attrition', axis=1)
    y_train = train_data['Attrition']
    y_test = test_data['Attrition']
    return X_train, X_test, y_train, y_test